/****************************************************

LeaseToSplitEstate

Generates the master list for city and street names

inputs:

outputs:
	
****************************************************/

clear
/*#delimit;*/
set mem 1000m
pause on
set more off

local source = 1
if `source' == 0 {
	cd "C:\Users\Ashley Vissing\Google Drive\personal"

	local pathtad = "G:\GeneralData\HousingData\AppraisalData\Tarrant\"
	local pathdq = "G:\GeneralData\HousingData\Dataquick\TX\Tarrant\"
	local patharea = "G:\NaturalGasData\AreaFiles/AAAA_gis/"

	local pathmerge = "G:/NaturalGasData/StringMatch/"
	local pathbundle = "G:/NaturalGasData/Tarrant_PermitBundles/MatchedLeasesToPermit_11122014/"
	local pathpermit = "G:/NaturalGasData/FuProject/Permit/dta/"
	local pathout = "G:/NaturalGasData/Tarrant_DataCleaning/Data10042016_small/"
	local pathwell = "G:/GeneralData/Drilling_info_data/Texas/Barnett/"

}
else if `source' == 1 {
	cd "/Users/ashley/Google Drive/personal"

	local pathout = "/Volumes/My Passport/NaturalGasData/StringMatch/dta/"
	local pathtad = "/Volumes/My Passport/GeneralData/HousingData/AppraisalData/Tarrant/"
	local pathDQ = "/Volumes/My Passport/NaturalGasData/Dataquick/"
	local pathnyt = "/Volumes/My Passport/NaturalGasData/NYTimes_LeaseData/LeaseData_dta/"
	local pathpermit = "/Volumes/My Passport/NaturalGasData/FuProject/Permit/dta/"
	local pathwell = "/Volumes/My Passport/GeneralData/Drilling_info_data/Texas/Barnett/"
	local pathout = "/Volumes/My Passport/NaturalGasData/Tarrant_DataCleaning/Data10042016_small/"

}


tempfile all split lease bundle subdivision abstract permit permitmin property aaaa
tempfile leasefill permitFULL buffer wellpad buffer op operator api prod


*********************************************************************
* Wellpads
/*
use `"`pathpermit'WellpadIDs_2017.dta"'

sort api
save `wellpad',replace

********************************************************************************
* Merge cumulative production of nearest well
********************************************************************************

use `"`pathwell'ProdDataCum_Barnett.dta"'

drop if api_prod == ""
drop if regexm(api_prod,"--")
drop entity_* 
drop year month
rename api_prod api
gen production = 1

sort api
save `prod',replace

use `"`pathwell'Permits_gis_Barnett_08262016.dta"'
keep if regexm(county,"tarrant")
drop if api == ""

tostring issueyear issuemonth issueday, replace
replace dateissued = issueyear + "/" + issueday + "/" + issuemonth
replace dateissued = "" if dateissued == "././."
destring issueyear issuemonth issueday, replace

drop if issueyear < 1977
replace county = subinstr(county," (tx)","",.)

replace well_depth = total_depth if well_depth == 0 | well_depth == .
keep api longitude latitude *_type dateissued contig_acres issue* well_depth rule* exception horizontal horz_dist county status
replace horz_dist = . if horz_dist == 0
sort api issueyear horz_dist
by api issueyear: carryforward horz_dist, replace
sort api well_depth
by api: carryforward well_depth, replace
duplicates drop
duplicates drop api dateissued well_depth contig_acres horz_dist, force
gsort api -issueyear issuemonth
duplicates drop api issueyear issuemonth, force

sort api
save `permit',replace

use `"`pathwell'production_Barnett_04132016.dta"'
*keep if regexm(county,"tarrant") | regexm(county,"denton")
keep api county longitude latitude *_type spuddate firstproddate completiondate firstprodyear lastprodyear spudyear cumoil cumgas first60oil first60gas first24oil first24gas horizontallength operator*
duplicates drop
drop if api == ""
replace cumgas = . if cumgas == 0
replace cumoil = . if cumoil == 0
duplicates drop api firstprodyear cumgas cumoil operator, force

sort api firstprodyear
by api: gen ct = _n
drop if ct > 1 & firstprodyear == .
gsort api -firstprodyear
duplicates drop api, force
drop ct

sort api
merge api using `prod', update replace
tab _merge
drop _merge

drop *3mon_* *6mon_*

sort api
merge api using `permit', update
tab _merge
drop _merge

sort api
merge api using `wellpad'
tab _merge

********************************************************************************
* Construct new wellpads that include permits only

gen wellpadnew = .
egen api_group = group(api)

local bound = 300

sort wellpadID longitude latitude
gen long_plus = longitude[_n+1]
gen lat_plus = latitude[_n+1]

vincenty lat_plus long_plus latitude longitude if longitude != . & long_plus != . , hav(longdist) inkm
replace  longdist = longdist*1000
gen tag = 1 if longdist < `bound'

replace wellpadnew = api_group*tag if wellpadnew == . & tag[_n+1] == .

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1

replace wellpadID = wellpadID[_n-1] if wellpadID == . & tag[_n-1] == 1
replace wellpadID = wellpadID[_n+1] if wellpadID == . & tag == 1
local i = `i' + 1
}

drop longdist tag *_plus

sort wellpadID latitude longitude 
gen long_plus = longitude[_n+1]
gen lat_plus = latitude[_n+1]

vincenty lat_plus long_plus latitude longitude if longitude != . & long_plus != . , hav(longdist) inkm
replace  longdist = longdist*1000
gen tag = 1 if longdist < `bound'

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1
local i = `i' + 1
}

replace wellpadnew = api_group*tag if wellpadnew == . & tag[_n+1] == .

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1

replace wellpadID = wellpadID[_n-1] if wellpadID == . & tag[_n-1] == 1
replace wellpadID = wellpadID[_n+1] if wellpadID == . & tag == 1
local i = `i' + 1
}

drop longdist tag *_plus

local j = 1
while `j' < 4 {

sort api latitude longitude 
gen long_plus = longitude[_n+1]
gen lat_plus = latitude[_n+1]

vincenty lat_plus long_plus latitude longitude if longitude != . & long_plus != . , hav(longdist) inkm
replace  longdist = longdist*1000
gen tag = 1 if longdist < `bound'

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1
local i = `i' + 1
}

replace wellpadnew = api_group*tag if wellpadnew == . & tag[_n+1] == .

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1

replace wellpadID = wellpadID[_n-1] if wellpadID == . & tag[_n-1] == 1
replace wellpadID = wellpadID[_n+1] if wellpadID == . & tag == 1
local i = `i' + 1
}

drop longdist tag *_plus

sort api longitude latitude  
gen long_plus = longitude[_n+1]
gen lat_plus = latitude[_n+1]

vincenty lat_plus long_plus latitude longitude if longitude != . & long_plus != . , hav(longdist) inkm
replace  longdist = longdist*1000
gen tag = 1 if longdist < `bound'

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1
local i = `i' + 1
}

replace wellpadnew = api_group*tag if wellpadnew == . & tag[_n+1] == .

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1

replace wellpadID = wellpadID[_n-1] if wellpadID == . & tag[_n-1] == 1
replace wellpadID = wellpadID[_n+1] if wellpadID == . & tag == 1
local i = `i' + 1
}

drop longdist tag *_plus
local j = `j' + 1
}

local j = 1
while `j' < 4 {
sort latitude longitude 
gen long_plus = longitude[_n+1]
gen lat_plus = latitude[_n+1]

vincenty lat_plus long_plus latitude longitude if longitude != . & long_plus != . , hav(longdist) inkm
replace  longdist = longdist*1000
gen tag = 1 if longdist < `bound'

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1
local i = `i' + 1
}

replace wellpadnew = api_group*tag if wellpadnew == . & tag[_n+1] == .

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1

replace wellpadID = wellpadID[_n-1] if wellpadID == . & tag[_n-1] == 1
replace wellpadID = wellpadID[_n+1] if wellpadID == . & tag == 1
local i = `i' + 1
}

drop longdist tag *_plus


sort longitude latitude  
gen long_plus = longitude[_n+1]
gen lat_plus = latitude[_n+1]

vincenty lat_plus long_plus latitude longitude if longitude != . & long_plus != . , hav(longdist) inkm
replace  longdist = longdist*1000
gen tag = 1 if longdist < `bound'

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1
local i = `i' + 1
}

replace wellpadnew = api_group*tag if wellpadnew == . & tag[_n+1] == .

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1

replace wellpadID = wellpadID[_n-1] if wellpadID == . & tag[_n-1] == 1
replace wellpadID = wellpadID[_n+1] if wellpadID == . & tag == 1
local i = `i' + 1
}

drop longdist tag *_plus
local j = `j' + 1
}

local j = 1
while `j' < 4 {

sort wellpadID latitude longitude 
gen long_plus = longitude[_n+1]
gen lat_plus = latitude[_n+1]

vincenty lat_plus long_plus latitude longitude if longitude != . & long_plus != . , hav(longdist) inkm
replace  longdist = longdist*1000
local bound = 300
gen tag = 1 if longdist < `bound'

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1
local i = `i' + 1
}

replace wellpadnew = api_group*tag if wellpadnew == . & tag[_n+1] == .

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1

replace wellpadID = wellpadID[_n-1] if wellpadID == . & tag[_n-1] == 1
replace wellpadID = wellpadID[_n+1] if wellpadID == . & tag == 1
local i = `i' + 1
}

drop longdist tag *_plus

sort wellpadID longitude latitude  
gen long_plus = longitude[_n+1]
gen lat_plus = latitude[_n+1]

vincenty lat_plus long_plus latitude longitude if longitude != . & long_plus != . , hav(longdist) inkm
replace  longdist = longdist*1000
gen tag = 1 if longdist < `bound'

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1
local i = `i' + 1
}

replace wellpadnew = api_group*tag if wellpadnew == . & tag[_n+1] == .

local i = 1 
while `i' < 20 {
replace wellpadnew = wellpadnew[_n-1] if wellpadnew == . & tag[_n-1] == 1
replace wellpadnew = wellpadnew[_n+1] if wellpadnew == . & tag == 1

replace wellpadID = wellpadID[_n-1] if wellpadID == . & tag[_n-1] == 1
replace wellpadID = wellpadID[_n+1] if wellpadID == . & tag == 1
local i = `i' + 1
}

drop longdist tag *_plus
local j = `j' + 1
}

replace wellpadnew = api_group if wellpadnew == .
drop api_group
bysort wellpadnew: egen lat_cent = mean(latitude)
bysort wellpadnew: egen long_cent = mean(longitude)

*replace wellpadID = wellpadID[_n+1] if wellpadID[_n+1] != . & tag[_n+1] == 1

*drop if _merge != 3
drop _merge

sort api
save `api', replace

********************************************************************************
* Operator name edits to match lease data

gen firm = lower(operatorOLD)

OperatorClean firm operatorOLD, orgvar(1)
replace firm = "" if firm == "na"
replace operator = firm
drop firm

replace county = subinstr(county," (tx)","",.)
gen diff = firstprodyear - issueyear
replace diff = . if diff < 0
sort api firstprodyear diff

duplicates drop api firstprodyear api_*, force
drop diff

egen firmNUM = group(operator)

gen counter = 1
bysort operator: egen opwellcount = sum(counter) if operator != ""
sum opwellcount if firmNUM != 0 & operator != "", det
gen firmNUMsm = 0 if opwellcount < `r(p50)' & operator != ""
replace firmNUMsm = firmNUM if firmNUMsm == .
drop counter

saveold `"`pathout'WellpadUpdate_permits&prod_04192018.dta"',replace version(12)
stop
*/
********************************************************************************

use `"`pathout'WellpadUpdate_permits&prod_04192018.dta"'

stop

/*
drop if operator == ""
drop if firstproddate == ""
split firstproddate, parse("-") gen(prod)
gen firstprodmonth = prod1 if length(prod3) == 4
replace firstprodmonth = prod2 if firstprodmonth == ""
drop prod1 - prod3
destring firstprodmonth, replace

sort operator firstprodyear firstprodmonth
by operator: gen firstwell = _n

gen counter = 1
bysort operator: egen temp = sum(counter)
sum temp if firstwell == 1, det
gen smallfirm = 1 if temp < r(mean)
hist temp if temp > 100 & firstwell == 1, width(50)
replace smallfirm = 1 if temp < 300
drop counter
drop temp

replace operator = "smallfirm" if smallfirm == 1

gen tarrantID = 1 if county == "tarrant"
replace tarrantID = 0 if tarrantID == .
sort operator tarrantID firstprodyear firstprodmonth 
by operator tarrantID: gen firstwell_tarrant = _n

gen counter = 1
bysort operator: egen wellcount_pre2004 = sum(counter) if firstprodyear < 2004
bysort operator: egen wellcount_post2004 = sum(counter) if firstprodyear >= 2004
sort operator wellcount_pre2004
by operator: carryforward wellcount_pre2004, replace
sort operator wellcount_post2004
by operator: carryforward wellcount_post2004, replace

bysort operator tarrantID: egen wellcount_tar_pre2004 = sum(counter) if firstprodyear < 2004
bysort operator tarrantID: egen wellcount_tar_post2004 = sum(counter) if firstprodyear >= 2004
sort operator tarrantID wellcount_tar_pre2004
by operator tarrantID: carryforward wellcount_tar_pre2004, replace
sort operator tarrantID wellcount_tar_post2004
by operator tarrantID: carryforward wellcount_tar_post2004, replace

gen direct = 0
replace direct = 1 if drill_type == "H" | drill_type == "D"

bysort operator: egen wellcount_dir_pre2004 = sum(counter) if firstprodyear < 2004 & direct == 1
bysort operator: egen wellcount_dir_post2004 = sum(counter) if firstprodyear >= 2004 & direct == 1
sort operator wellcount_dir_pre2004
by operator: carryforward wellcount_dir_pre2004, replace
sort operator wellcount_dir_post2004
by operator: carryforward wellcount_dir_post2004, replace

********************************************************************************
* Figure 1: Drilling Activity in the Barnett Shale

graph bar wellcount_dir_pre2004 wellcount_dir_post2004 if smallfirm == . & firstwell == 1, over(operator, /*
*/ label(angle(45) labsize(vsmall))) legend(label(1 "Pre-2004") label(2 "Post-2003") size(vsmall)) /*
*/ graphregion(color(white)) bgcolor(white) ylabel(,nogrid) ylabel(,labsize(vsmall))

graph bar wellcount_pre2004 wellcount_post2004 wellcount_dir_post2004 if smallfirm == . & firstwell == 1, over(operator, /*
*/ label(angle(45) labsize(vsmall))) legend(label(1 "Pre-2004") label(2 "Post-2003") size(vsmall)) /*
*/ graphregion(color(white)) bgcolor(white) ylabel(,nogrid) ylabel(,labsize(vsmall))

graph bar wellcount_pre2004 wellcount_post2004 wellcount_dir_post2004 if smallfirm == . & firstwell_tarrant == 1 & tarrantID == 1, over(operator, /*
*/ label(angle(45) labsize(vsmall))) legend(label(1 "Pre-2004") label(2 "Post-2003") label(3 "Horizontal - Post-2003") col(3) size(vsmall)) /*
*/ graphregion(color(white)) bgcolor(white) ylabel(,nogrid) ylabel(,labsize(vsmall)) name(hold, replace)

graph combine hold, saving(hold,replace) 
graph export DrillingAct_All.png, replace

graph bar wellcount_tar_pre2004 wellcount_tar_post2004 if firstwell_tarrant == 1 & tarrantID == 1, over(operator, /*
*/ label(angle(45) labsize(vsmall))) legend(label(1 "Pre-2004") label(2 "Post-2003") size(vsmall)) /*
*/ graphregion(color(white)) bgcolor(white) ylabel(,nogrid) ylabel(,labsize(vsmall)) name(hold, replace)

graph combine hold, saving(hold,replace) 
graph export DrillingAct_Tarrant.png, replace

stop
*/

save `permit',replace

********************************************************************************
* Reshape the operators of the well by api

keep operator api
duplicates drop
drop if operator == ""
sort api
save `op',replace

clear
use `permit'
sort api
merge api using `op'
tab _merge
drop _merge

* Reshape issuedates
keep api issue* dateissued
duplicates drop

gsort api -issueyear -issuemonth -issueday
by api: gen oporder = _n
drop issueyear issuemonth issueday
reshape wide dateissued, i(api) j(oporder)
sort api
save `op',replace

clear
use `permit'
sort api
merge api using `op'
tab _merge
drop _merge
drop dateissued
duplicates drop

tab status
gen status_dum = 1 if status == "completed"
replace status_dum = 1 if status == "active"
tab status_dum

saveold `"`pathout'PermitDates_04192018.dta"',replace version(12)

********************************************************************************
* Firm specific driling variables

clear
use `permit'

drop if operator == ""

gen yeardum = 1 if firstprodyear < 2004
drop if firstprodyear == .
tab firstprodyear

duplicates drop api firstproddate, force
split firstproddate, parse("-") gen(prod)
gen firstprodmonth = prod1 if length(prod3) == 4
replace firstprodmonth = prod2 if firstprodmonth == ""
drop prod1 - prod3
destring firstprodmonth, replace

sort api firstprodyear firstprodmonth
duplicates drop api operator, force

gsort api -firstprodyear -firstprodmonth
duplicates drop api, force

gen direct = 0
replace direct = 1 if drill_type == "H" | drill_type == "D"
gen gas = 0
replace gas = 1 if prod_type == "GAS" | permit_type == "GAS"

tab county

gen temp = 1 if regexm(county,"tarrant")
bysort operator: egen op_prod_TCwells = sum(production) if temp == 1
bysort operator: egen op_prod_wells = sum(production)   if temp == .
bysort operator: egen op_prodgas_TCwells = sum(production) if temp == 1 & gas == 1
bysort operator: egen op_prodgas_wells = sum(production)   if temp == . & gas == 1
bysort operator: egen op_proddir_TCwells = sum(production) if temp == 1 & direct == 1
bysort operator: egen op_proddir_wells = sum(production)   if temp == . & direct == 1

bysort operator: egen op_prod_TCwells_2003 = sum(production) if temp == 1 & firstprodyear < 2004
bysort operator: egen op_prod_wells_2003 = sum(production)   if temp == . & firstprodyear < 2004
bysort operator: egen op_prodgas_TCwells_2003 = sum(production) if temp == 1 & gas == 1 & firstprodyear < 2004
bysort operator: egen op_prodgas_wells_2003 = sum(production)   if temp == . & gas == 1 & firstprodyear < 2004
bysort operator: egen op_proddir_TCwells_2003 = sum(production) if temp == 1 & direct == 1 & firstprodyear < 2004
bysort operator: egen op_proddir_wells_2003 = sum(production)   if temp == . & direct == 1 & firstprodyear < 2004


foreach var of varlist op_prod* {
	sort operator `var'
	by operator: carryforward `var',replace
	replace `var' = 0 if `var' == .
}

gen smallFirm = 1 if firmNUMsm == 0

foreach var of varlist op_prod_TCwells op_prodgas_TCwells op_proddir_TCwells {
egen `var'_sm = mean(`var') if smallFirm == 1 & temp == 1
sort `var'_sm
carryforward `var'_sm, replace
}
foreach var of varlist op_prod_wells op_prodgas_wells op_proddir_wells {
egen `var'_sm = mean(`var') if smallFirm == 1 & temp != 1
sort `var'_sm
carryforward `var'_sm, replace
}

foreach var of varlist op_prod_wells_2003 op_prodgas_wells_2003 op_proddir_wells_2003 {
egen `var'_sm = mean(`var') if smallFirm == 1 & firstprodyear < 2004
sort `var'_sm
carryforward `var'_sm, replace
}

gsort operator temp
duplicates drop operator, force

drop temp

keep operator op_prod* smallFirm firmNUM*
replace smallFirm = 0 if smallFirm == .
tab smallFirm
duplicates drop
saveold `"`pathout'OperatorDrilling_04192018.dta"',replace version(12)

